Daily Exercise 22

Time Series Practice with modeltime

Author

Josh Puyear

Use modeltime to forecast the next 12 months of streamflow data in the Poudre River based on last time assignment.

Loading libraries:

library(modeltime)
library(dataRetrieval)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(plotly)
Loading required package: ggplot2

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0     ✔ stringr 1.5.1
✔ purrr   1.0.4     ✔ tibble  3.2.1
✔ readr   2.1.5     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ plotly::filter() masks dplyr::filter(), stats::filter()
✖ dplyr::lag()     masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(zoo)

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
library(feasts)
Loading required package: fabletools
Registered S3 method overwritten by 'tsibble':
  method               from 
  as_tibble.grouped_df dplyr
library(tsibble)

Attaching package: 'tsibble'

The following object is masked from 'package:zoo':

    index

The following object is masked from 'package:lubridate':

    interval

The following objects are masked from 'package:base':

    intersect, setdiff, union
library(ggpubr)
library(ggplot2)
library(timetk)
library(rsample)
library(parsnip)

Attaching package: 'parsnip'

The following object is masked from 'package:fabletools':

    null_model
library(patchwork)

Previous assingment’s data.frame

#previous assignment's data table

poudre_flow <- readNWISdv(siteNumber = "06752260",    # Download data from USGS for site 06752260
                          parameterCd = "00060",      # Parameter code 00060 = discharge in cfs)
                          startDate = "2013-01-01",   # Set the start date
                          endDate = "2023-12-31") |>  # Set the end date
  renameNWISColumns() |>                              # Rename columns to standard names (e.g., "Flow", "Date")
  mutate(Date = yearmonth(Date)) |>                   # Convert daily Date values into a year-month format (e.g., "2023 Jan")
  group_by(Date) |>                                   # Group the data by the new monthly Date
  summarise(Flow = mean(Flow))
GET:https://waterservices.usgs.gov/nwis/dv/?site=06752260&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=2013-01-01&endDT=2023-12-31
#makina tsibble (also previous assignment)
poudre_flow <- as_tsibble(poudre_flow)
Using `Date` as index variable.
head(poudre_flow)
# A tsibble: 6 x 2 [1M]
      Date   Flow
     <mth>  <dbl>
1 2013 Jan  18.1 
2 2013 Feb  18.0 
3 2013 Mar   8.21
4 2013 Apr   5.94
5 2013 May 333.  
6 2013 Jun 300.  

Forecasting the next 12 months

Timeseries Split

poudre_flow <- poudre_flow |>
  as_tibble() |>
  mutate(date = as.Date(Date), index = NULL)


splits <- time_series_split(poudre_flow, assess = "60 months", cumulative = TRUE)
Using date_var: date
#assess = # of months as the testing set
#cumulative = TRUE makes all the preceeding data the training data.

training <-  training(splits)
testing  <-  testing(splits)

Making the models

mods <- list(
  arima_reg() |> set_engine("auto_arima"),  prophet_reg() |> set_engine("prophet")
                                          
)

Use the prophet_reg(), and arima_reg() function to create a Prophet model for forecasting.

models <- map(mods, ~ fit(.x, Flow ~ date, data = training))
frequency = 12 observations per 1 year
Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#modeltime_table()

models_tbl <- as_modeltime_table(models)

#calibrate models- adds new column with test predictions and residuals
(calibration_table <- modeltime_calibrate(models_tbl, testing, quiet = FALSE))
# Modeltime Table
# A tibble: 2 × 5
  .model_id .model   .model_desc             .type .calibration_data
      <int> <list>   <chr>                   <chr> <list>           
1         1 <fit[+]> ARIMA(1,0,0)(0,1,0)[12] Test  <tibble [60 × 4]>
2         2 <fit[+]> PROPHET                 Test  <tibble [60 × 4]>
modeltime_accuracy(calibration_table) |>
  arrange(mae)
# A tibble: 2 × 9
  .model_id .model_desc             .type   mae  mape  mase smape  rmse   rsq
      <int> <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1         1 ARIMA(1,0,0)(0,1,0)[12] Test   90.2  70.3 0.575  57.7  209. 0.457
2         2 PROPHET                 Test  180.  326.  1.15  150.   270. 0.645

predict flow for the next 12 months

forecast <- calibration_table |> modeltime_forecast(h = "12 months", new_data = testing,
actual_data = poudre_flow)

plot_modeltime_forecast(forecast)

Refit the data to the full dataset

refit_tbl <- calibration_table |>
  modeltime_refit(data = poudre_flow)
frequency = 12 observations per 1 year
Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
refit_forecast <- refit_tbl |>
  modeltime_forecast(h = "12 months", actual_data = poudre_flow) |> plot_modeltime_forecast()

#Refitting:

#Retrieves your model and preprocessing steps
#Refits the model to the new data
#Recalculates any automations. This includes:
#Recalculating the changepoints for the Earth Model
#Recalculating the ARIMA and ETS parameters
#Preserves any parameter selections. This includes:
#Any other defaults that are not automatic calculations are used.

Use dataRetrieval to download daily streamflow for the next 12 months. Aggregate this data to monthly averages and compare it to the predictions made by your modeltime model.

library(dataRetrieval)

poudre_24 <- readNWISdv(siteNumber = "06752260",    # Download data from USGS for site 06752260
                          parameterCd = "00060",      # Parameter code 00060 = discharge in cfs)
                          startDate = "2024-01-01",   # Set the start date
                          endDate = "2024-12-31") |>  # Set the end date
  renameNWISColumns() |>                              # Rename columns to standard names (e.g., "Flow", "Date")
  mutate(Date = yearmonth(Date)) |>                   # Convert daily Date values into a year-month format (e.g., "2023 Jan")
  group_by(Date) |>                                   # Group the data by the new monthly Date
  summarise(Flow = mean(Flow))
GET:https://waterservices.usgs.gov/nwis/dv/?site=06752260&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=2024-01-01&endDT=2024-12-31
poudre_24 <- poudre_24 %>% 
  as_tsibble() %>% 
  mutate(date = as.Date(Date))
Using `Date` as index variable.

Compute the R2 value between the model predictions and the observed data using a linear model and report the meaning.

forecast_arima <- forecast %>% 
  filter(.model_desc == "ARIMA(1,0,0)(0,1,0)[12]") %>% 
  mutate(date = .index)

model_lm <- lm(forecast_arima$.value ~ poudre_24$Flow)

summary(model_lm)

Call:
lm(formula = forecast_arima$.value ~ poudre_24$Flow)

Residuals:
    Min      1Q  Median      3Q     Max 
-182.44  -59.74  -28.93    1.45  480.58 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)  
(Intercept)     15.0032    65.5340   0.229   0.8235  
poudre_24$Flow   1.4480     0.4917   2.945   0.0147 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 174.4 on 10 degrees of freedom
Multiple R-squared:  0.4645,    Adjusted R-squared:  0.4109 
F-statistic: 8.674 on 1 and 10 DF,  p-value: 0.01466

Last, generate a plot of the Predicted vs Observed values and include a 1:1 line, and a linear model line.

join <- left_join(poudre_24, forecast_arima, by = "date")

gghistogram(forecast_arima$.value)
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.

gghistogram(poudre_24$Flow)
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.

join |>
 ggscatter(x = '.value', y = 'Flow') +
         labs(title = "Actual vs Prediced Flow on the Poudre",
              subtitle = "2024",
              x = "Forecasted Flow",
              y = "Measured Flow") + 
  geom_smooth(method = "lm") +
geom_abline(intercept = 0, slope = 1, color = "red") +
  theme_bw()
`geom_smooth()` using formula = 'y ~ x'